Simulated Data

The data is simulated via the following code. Histogram of each variable is shown below. Each of x2 : x7 has its own data structure while x1, x8:x10 are randomly generated normal variables.

set.seed(1234)
x1 <- rnorm(1000, 0, 1)
x2 <- sample(c(rnorm(500, -3, 1), rnorm(500, 3, 1)), size = 1000)
x3 <- sample(c(rep(-1, 500), rep(1, 500)), size = 1000)
x4 <- sample(c(rnorm(250, -3, 1), rnorm(750, 3, 1)), size = 1000)
x5 <- sample(c(rnorm(330, -5, 1), rnorm(340, 0, 1), rnorm(330, 5, 1)), size = 1000)
x6 <- sample(c(rnorm(450, -5, 1), rnorm(100, 0, 1), rnorm(450, 5, 1)), size = 1000)
x7 <- sample(c(rnorm(500, -5, 1), rnorm(500, 5, 1)), size = 1000)
x8 <- rnorm(1000, 0, 1)
x9 <- rnorm(1000, 0, 1)
x10 <- rnorm(1000, 0, 1)

data_mult <- tibble::tibble(x1 = x1, x2 = x2, x3 = x3, 
                               x4 = x4, x5 = x5, x6 = x6, x7 = x7,
                               x8 = x8, x9 = x9, x10 = x10) %>% map_df(scale)
origin_dt_bi <- data_mult %>% 
  dplyr::select(-x3) %>% 
  gather(names, values) %>%
  mutate(names = as_factor(names))
  

origin_dt_bi %>%
  ggplot(aes(x = values)) +
  geom_histogram(binwidth = 0.3) +
  geom_density(aes(y = 0.3 * ..count..)) +
  facet_wrap(vars(names), ncol = 3)

2D projection on 2 informative variables

Searching method Search_geodesic() is performed on x1, x2, x8:x10 with an additional variable from x3: x7. The following few screenshots are taken at the end of each trial and projection pursuit are able to find the data structure in all the caese.

run_geodesic <- function(var){
  set.seed(12345)
  mult_geodesic <- data_mult[,c(1,2,var,8,9,10)] %>% 
  animate_xy(tour_path = guided_tour(holes(), d = 2,
                                     search_f = search_geodesic_latest),
             rescale = FALSE)

  return(mult_geodesic)
}

2D projection on 3 informative variables

Searching method Search_geodesic() is performed on x1, x8, x9, x10 with three additional variables choosing from x2, x3, x4, x5, x6, x7. Notice that the pattern in x3 is very different from other varaibles, it is always found in the tour, while less distinct informatiove variables are less likely to be found. We could allow for sequential step so that the variables found could be put aside in the next tour. i.e. with x1: x4, x8:x10, x3 is found first, then run the tour again with all the other variables other than x3. See the following examples:

  1. x2, x3, x4: x3 is recognised while x2 and x4 are mixed

  1. x2, x5, x6: without x3, x5 and x6 are more distinct and thus found

  1. x3, x5, x6: when x3 is added back, the tour can’t find x5, x6 again

PCA plot and index trace plot

Similar to 1-tour_optim_documentation.html, PCA plot and index_val tracing plot are provided. With two columns in each projection basis, PCA is done separately for each columns. The following demo uses variable x1, x2, x7, x8, x9, x10.

proj <- mult_geodesic %>% 
  filter(info == "interpolation") %>% pull(basis) %>% tail(1)
proj[[1]]
##              [,1]        [,2]
## [1,] -0.001738144  0.03945460
## [2,]  0.496024430 -0.86384416
## [3,]  0.862833666  0.49212418
## [4,] -0.028429470 -0.05467946
## [5,]  0.077831028  0.06522615
## [6,] -0.051077434  0.05278356
projected <- as.matrix(data_mult[,c(1,2,7,8,9,10)]) %*% proj[[1]] %>% as_tibble()
projected %>% 
  ggplot(aes(x = V1, y = V2)) + 
  geom_point() + 
  ggtitle("without rounding")

PCA plot

# colour by parts of optimisation (static)
mult_releveled <- pca_v1 %>%
  filter(info != "interpolation") 

p1 <- pca_v1 %>% 
  ggplot(aes(x = PC1, y = PC2, col = info)) +
  geom_point() +
  geom_point(data = mult_releveled) +
  theme(aspect.ratio = 1) + 
  ggtitle("PCA plot for v1")


# colour by parts of optimisation (static)
mult_releveled <- pca_v2 %>%
  filter(info != "interpolation") 

p2 <- pca_v2 %>% 
  ggplot(aes(x = PC1, y = PC2, col = info)) +
  geom_point() +
  geom_point(data = mult_releveled) +
  theme(aspect.ratio = 1) + 
  ggtitle("PCA plot for v2")

p1 + p2 + plot_layout(guides = "collect")

Index_val Tracing plot

mult_animate_v1 <- pca_v1 %>%
  mutate(loop = ifelse(info == "start", 1, loop)) %>% 
  group_by(tries, loop, info3) %>%
  mutate(animate_id = group_indices())

mult_animate_v2 <- pca_v2 %>%
  mutate(loop = ifelse(info == "start", 1, loop)) %>% 
  group_by(tries, loop, info3) %>%
  mutate(animate_id = group_indices())

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

mult_path_v1 <- mult_animate_v1 %>% 
  ungroup() %>% 
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_point() + 
  theme(legend.position =  "none")

mult_path_v2 <- mult_animate_v2 %>% 
  ungroup() %>% 
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_point() 


last_tries_v1 <- mult_animate_v1$tries[which.max(mult_animate_v1$tries)]
last_tries_v2 <- mult_animate_v2$tries[which.max(mult_animate_v2$tries)]

direction_v1 <- mult_animate_v1 %>% 
  filter(tries == last_tries_v1, info %in% c("direction_search", "best_direction_search")) %>%
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_point() + 
  xlim(1, max(mult_animate_v1$animate_id)) + 
  scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) + 
  theme(legend.position =  "none")

direction_v2 <- mult_animate_v2 %>% 
  filter(tries == last_tries_v2, info %in% c("direction_search", "best_direction_search")) %>%
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_point() + 
  xlim(1, max(mult_animate_v2$animate_id)) + 
  scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) + 
  theme(legend.position =  "none")
  

design <- "
  13
  24
  24
"
direction_v1 + mult_path_v1 + direction_v2 + mult_path_v2 +
  plot_layout(design = design,  guides = "collect")

Side-by-side animation

geo_animate <- mult_animate_v1 %>%
  ggplot(aes(x = PC1, y = PC2, col = info)) +
  geom_point() +
  theme(aspect.ratio = 1, 
        legend.position = "none") +
  gganimate::transition_time(animate_id) +
  gganimate::shadow_mark()

p1 <- gganimate::animate(geo_animate, fps = 2)

index_val_animate <- mult_animate_v1 %>% 
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_jitter(width = 0.0005) + 
  theme(aspect.ratio = 1) +
  gganimate::transition_time(animate_id) + 
  gganimate::shadow_mark()

p2 <- gganimate::animate(index_val_animate, fps = 2)

a_mgif <- image_read(p1)
b_mgif <- image_read(p2)

new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
  combined <- image_append(c(a_mgif[i], b_mgif[i]))
  new_gif <- c(new_gif, combined)
}

new_gif

geo_animate <- mult_animate_v2 %>%
  ggplot(aes(x = PC1, y = PC2, col = info)) +
  geom_point() +
  theme(aspect.ratio = 1, 
        legend.position = "none") +
  gganimate::transition_time(animate_id) +
  gganimate::shadow_mark()

p1 <- gganimate::animate(geo_animate, fps = 2)

index_val_animate <- mult_animate_v2 %>% 
  ggplot(aes(x = animate_id, y = index_val, col = info)) + 
  geom_jitter(width = 0.0005) + 
  theme(aspect.ratio = 1) +
  gganimate::transition_time(animate_id) + 
  gganimate::shadow_mark()

p2 <- gganimate::animate(index_val_animate, fps = 2)

a_mgif <- image_read(p1)
b_mgif <- image_read(p2)

new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
  combined <- image_append(c(a_mgif[i], b_mgif[i]))
  new_gif <- c(new_gif, combined)
}

new_gif